##################################################################################################
### Observational BLM Protest Dataset Analysis
### Replication Package associated with Perspectives on Politics article
### "How Police Behavior Shapes Perceptions of Protests:Evidence from Black Lives Matter"
### Jasmine English, Ariel White, Laurel Eckhouse
### with thanks to Alisa Robinson for her original BLM protest dataset on elephrame.net,
### and our many wonderful RA's who did a lot of this handcoding (see paper acknowledgements)
### Autumn 2024
##################################################################################################

library(zoo)
library(data.table)
library(rgeos)
library(sp)
library(rgdal)
library(maps)
library(stargazer) 

#load in main cleaned protest dataset (see paper/SI for description of construction)
load("fullcleanedBLMprotests_July2014March2017_supplementingRobinson2017.Rdata")
dim(protestobsclean)

##################################################################################################
### Map of protest sizes (Figure 1)

pointcolor <-  rgb(red = .4, green = .4, blue = .6, alpha = 0.7)

pdf(file="./protestmapscaled.pdf")
maps::map("state", interior=FALSE) #draw map of US
maps::map("state", boundary=FALSE, col="gray", add=TRUE) #add in borders
title("Black Lives Matter Protests, July 2014-March 2017")

points(x = protestobsclean[protestobsclean$protestsizefactor=="0-50",]$INTPTLONG, y =  protestobsclean[protestobsclean$protestsizefactor=="0-50",]$INTPTLAT, col =pointcolor, cex=.6) 
points(x = protestobsclean[protestobsclean$protestsizefactor=="50-100",]$INTPTLONG, y =  protestobsclean[protestobsclean$protestsizefactor=="50-100",]$INTPTLAT, col =pointcolor, cex=1)
points(x = protestobsclean[protestobsclean$protestsizefactor=="100-1000",]$INTPTLONG, y =  protestobsclean[protestobsclean$protestsizefactor=="100-1000",]$INTPTLAT, col = pointcolor, cex=2)
points(x = protestobsclean[protestobsclean$protestsizefactor=="1000+",]$INTPTLONG, y =  protestobsclean[protestobsclean$protestsizefactor=="1000+",]$INTPTLAT, col = pointcolor, cex=3)

legend(x = "bottomright",          # Position
       legend = c("0-50", "50-100", "100-1000", "1000+"),  # Legend texts
       pch = c(1,1,1,1), cex=1.02,
       col = c(pointcolor),           # Line colors
       pt.cex = c(.6, 1, 2, 3), title="Protest size")  
dev.off()


##################################################################################################
### Plot of protest density through time (SI Figure A1)

protestobsclean$protestmonth <-format(protestobsclean$protestdate, "%m-%Y")
protestobsclean$protestmth <-format(protestobsclean$protestdate, "%m")
protestobsclean$protestyear <-format(protestobsclean$protestdate, "%Y")
summary(protestobsclean$protestmonth)
protestsdatatab <- data.table(protestobsclean)
protestsdatatab[, observation:= 1]
protestmonths <- protestsdatatab[, list(protestscount = sum(observation)), by=list(protestmonth, protestmth, protestyear)]
setkey(protestmonths, "protestyear", "protestmth")
protestmonths <- protestmonths[-1,] #drop the row with NA dates-- 2 protests. 

pdf("./protestobs_overtime_month.pdf")
plot(protestmonths$protestscount, axes=F, ylab="Monthly protest count", main="BLM Protests Over Time", xlab="", type="b",ylim=c(0, 200))
axis(side=1, at=1:nrow(protestmonths), labels=protestmonths$protestmonth, las=2)
axis(side=2)
dev.off()

##################################################################################################
### Text description: what US population saw, protest-wise, in 2014 and 2015.

### how many states?
dim(table(protestobsclean$stateabbrev)) #45 states & DC

### how many cities saw protests for Nat'l Moment of Silence
waveNMOS14 <- protestobsclean[protestobsclean$protestdate=="2014-08-14",]; dim(waveNMOS14)
length(unique(waveNMOS14$city)) #88

### exposure to protests, esp in 2014 as described in paper
table(protestobsclean$protestyear)
p2014 <- data.table(protestobsclean[protestobsclean$protestyear=="2014",]); dim(p2014)
p1415 <- data.table(protestobsclean[protestobsclean$protestyear=="2014"| protestobsclean$protestyear=="2015",]); dim(p1415)
length(unique(p2014$AFFGEOID_COUNTY));length(unique(p1415$AFFGEOID_COUNTY)) #not that many more when including 2015

p2014[, protesttag:=1]
counties14 <- p2014[, list(total14protests = sum(protesttag), anyshutdowns = max(shutitdown, na.rm=T), anypolicepresence = max(policepresenceflat, na.rm=T), anyarrests = max(anyarrestsflat, na.rm=T)), by=c("AFFGEOID_COUNTY", "pop_cou","county", "state.y", "COUNTYFP_COUNTY", "STATEFP_COUNTY")]
dim(counties14)
sum(counties14$pop_cou, na.rm=T) #this is over a third of the US population that saw a protest in their county by the end of 2014.  
#US pop in 2016 was 323,127,515: https://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ACS_16_1YR_S0501&prodType=table
sum(counties14$pop_cou, na.rm=T)/323127515

### look at exposure including neighboring counties (paper says: "over 70% had a protest in their own or a neighboring county")
#pull in census county adjacency file (downloaded from here 6/3/19: https://www.census.gov/programs-surveys/geograrphy/technical-documentation/records-layout/county-adjacency-record-layout.html
adj <- as.data.table(read.table("./county_adjacency.txt", header=F, sep="\t", colClasses=c("character", "character", "character", "character"))); head(adj, 12)
colnames(adj) <- c("countyname", "countyfips", "neighborname", "neighborfips")
adj[countyname=="", countyname:=NA]; adj[, countyname:= na.locf(countyname, na.rm=FALSE)] 
adj[countyfips=="", countyfips:=NA]; adj[, countyfips:= na.locf(countyfips, na.rm=FALSE)] 
#pull in a full dataset of all the counties in the US (with 2014 population data) and for each of them
#flag whether they had a protest, and whether they neighbored a county with a protest (as of 2014 or over the whole period) 

allcounties <- read.csv("./PEP_2014_PEPANNRES_with_ann.csv", stringsAsFactors=F) #ACS pop estimates for all counties 2010-2014
allcounties <- as.data.table(allcounties[-1,]) 
countieswprotests14 <- counties14$AFFGEOID_COUNTY #which counties saw protests in 2014?

allcounties[, protest14:= 0]; allcounties[GEO.id %in% countieswprotests14, protest14:= 1]; table(allcounties$protest14)
protestfips14 <- allcounties[protest14==1, "GEO.id2"]; dim(protestfips14) #grab all the identifiers for counties that saw protests
protestadjacentfips14 <- adj[countyfips %in% protestfips14$GEO.id2, "neighborfips"]; dim(protestadjacentfips14) #now grab the identifiers for the counties adjacent to those ones
allcounties[, protestadjacent14:=0]; allcounties[GEO.id2 %in% protestadjacentfips14$neighborfips, protestadjacent14:=1] #mark them accordingly
table(allcounties$protest14); table(allcounties$protestadjacent14)
allcounties[, countypop14:= as.numeric(respop72014)]
sum(allcounties$countypop14, na.rm=T)
sum(allcounties$countypop14[allcounties$protestadjacent14==1], na.rm=T)/sum(allcounties$countypop14, na.rm=T) #most people. 
sum(allcounties$countypop14[allcounties$protestadjacent14==1|allcounties$protest14==1], na.rm=T)/sum(allcounties$countypop14, na.rm=T) 
sum(allcounties$countypop14[allcounties$protest14==1], na.rm=T)/sum(allcounties$countypop14, na.rm=T) #reproduce exposure est from above using 2014 pop data

##################################################################################################
### Regressions/Tables

### Descriptive Statistics
rownamesdesc <- c(paste("All Protests", " (", nrow(protestobsclean), ")", sep=""), 
	paste("Highway-Blockage Protests", " (", nrow(protestobsclean[protestobsclean$highwayblockage==1,]), ")", sep=""),
	paste("Other-Disruption Protests", " (", nrow(protestobsclean[protestobsclean$shutitdown==1,]), ")", sep=""), 
	paste("After-Dark Protests", " (", nrow(protestobsclean[protestobsclean$afterdark==1,]), ")", sep=""), 
	paste("Policing-Focused Protests", " (", nrow(protestobsclean[protestobsclean$aboutpolicing==1,]), ")", sep=""))

rowfunc <- function(dataset){
	fractionpolicepresent <- mean(dataset$policepresenceflat, na.rm=T)
	fractionarrests <- mean(dataset$anyarrestsflat, na.rm=T)
	fractioncrowdcontrol <- mean(dataset$crowdcontrol, na.rm=T)
	return(c(fractionpolicepresent, fractionarrests, fractioncrowdcontrol))
}

rowfunc(protestobsclean)
desctab <- rbind(rowfunc(protestobsclean), rowfunc(protestobsclean[protestobsclean$highwayblockage==1,]),  rowfunc(protestobsclean[protestobsclean$shutitdown==1,]),  rowfunc(protestobsclean[protestobsclean$afterdark==1,]),  rowfunc(protestobsclean[protestobsclean$aboutpolicing==1,]))
rownames(desctab) <- rownamesdesc
colnames(desctab) <- c("Police Presence", "Any Arrests", "Other Crowd Control")
### Table A3 in SI
stargazer(desctab, digits=2, out="./protestoutcomedescriptives.tex", label="outcomedescription", title="Outcome Variable Means by Protest Features")


#protest characteristics and outcomes
police1 <- lm(policepresenceflat ~ highwayblockage + shutitdown + afterdark,  data=protestobsclean); summary(police1) 

police2 <- lm(policepresenceflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000+ mostlyblackprotesters +aboutpolicing,  data=protestobsclean); summary(police2) 

police3 <- lm(policepresenceflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(stateabbrev) ,  data=protestobsclean); summary(police3)

police4 <- lm(policepresenceflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(city) ,  data=protestobsclean); summary(police4)


arrests1 <- lm(anyarrestsflat ~ highwayblockage + shutitdown + afterdark,  data=protestobsclean); summary(arrests1) 

arrests2 <- lm(anyarrestsflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000+ mostlyblackprotesters+aboutpolicing,  data=protestobsclean); summary(arrests2) 

arrests3 <- lm(anyarrestsflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(stateabbrev) ,  data=protestobsclean); summary(arrests3)
 
arrests4 <- lm(anyarrestsflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(city) ,  data=protestobsclean); summary(arrests4) 

cc1 <- lm(crowdcontrol ~ highwayblockage + shutitdown + afterdark,  data=protestobsclean); summary(cc1) 

cc2 <- lm(crowdcontrol ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000+ mostlyblackprotesters+aboutpolicing,  data=protestobsclean); summary(cc2) 

cc3 <- lm(crowdcontrol ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(stateabbrev) ,  data=protestobsclean); summary(cc3)

cc4 <- lm(crowdcontrol ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 + mostlyblackprotesters+aboutpolicing +popthousands +as.factor(city) ,  data=protestobsclean); summary(cc4)

### Table A1 in SI
stargazer(police1, police2, police3,arrests1, arrests2, arrests3, cc1, cc2, cc3, out="protestcharacteristicslong.tex", 
covariate.labels = c("Highway Blockage", "Other Disruption", "After Dark", "Protest Size Under 50", "Protest Size 50-100", "Protest Size 100-1000", "Majority-Black Protesters", "Policing-focused Protest","Municipal Population (Thousands)"), 
omit        = "stateabbrev",
add.lines = list(c("State FE", "", "", "X", "", "", "X", "", "", "X")),
dep.var.labels   = c("Any Police Presence", "Any Arrests Made", "Crowd Control Measures"), 
star.cutoffs = c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE,
omit.stat=c("LL","ser","f"),
font.size="scriptsize",
label="protestcharacteristicslong", title="Protest Characteristics and Police Response (Extra Specifications)")

### Table 1 in paper
stargazer(police1, police2,arrests1, arrests2, cc1, cc2, out="protestcharacteristics1.tex", 
covariate.labels = c("Highway Blockage", "Other Disruption", "After Dark", "Protest Size Under 50", "Protest Size 50-100", "Protest Size 100-1000", "Majority-Black Protesters", "Policing-focused Protest"), 
omit        = "stateabbrev",
omit.labels = "State FE",
dep.var.labels   = c("Any Police Presence", "Any Arrests Made", "Crowd Control Measures"), 
star.cutoffs = c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE,
omit.stat=c("LL","ser","f"),
label="protestcharacteristics1", title="Protest Characteristics and Police Response")

### additional reviewer requested specifications
police6 <- lm(policepresenceflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 +aboutpolicing +popthousands +anyprotesterinjuries +anypoliceinjuries +as.factor(county) +as.factor(protestmonth) ,  data=protestobsclean); summary(police6)

arrests6 <- lm(anyarrestsflat ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 +aboutpolicing +popthousands +anyprotesterinjuries +anypoliceinjuries +as.factor(county)+as.factor(protestmonth) ,  data=protestobsclean); summary(arrests6) 
 
cc6 <- lm(crowdcontrol ~ highwayblockage + shutitdown + afterdark + protestsize050 + protestsize50100 + protestsize1001000 +aboutpolicing +popthousands +anyprotesterinjuries +anypoliceinjuries +as.factor(county)+as.factor(protestmonth) ,  data=protestobsclean); summary(cc6)

### Table A2 in SI 
stargazer(police6, arrests6, cc6, out="protestcharacteristics_forR6.tex", 
covariate.labels = c("Highway Blockage", "Other Disruption", "After Dark", "Protest Size Under 50", "Protest Size 50-100", "Protest Size 100-1000", "Policing-focused Protest","Municipal Population (Thousands)", "Protester Injuries Reported (0/1)", "Police Injuries Reported (0/1)"), 
omit        = c("county", "protestmonth"),
add.lines = list(c("County FE",  "X",  "X", "X"), c("Month FE",  "X",  "X", "X")),
dep.var.labels   = c("Any Police Presence", "Any Arrests Made", "Crowd Control Measures"), 
star.cutoffs = c(0.05), notes="$^{*}$p$<$0.05", notes.append=FALSE,
omit.stat=c("LL","ser","f"),
font.size="scriptsize",
label="protestcharacteristicsR6", title="Protest Characteristics and Police Response (Further Specifications)")


##################################################################################################
### pull a small sample of rows to look at details of protest coverage: discussed FN 6 in paper
set.seed(14609)
samplerows <- protestobsclean[sample(1:nrow(protestobsclean), 20),]

##################################################################################################
### Comparison between this dataset size and that of Williamson et al
### discussed in SI Section 1.2

### subset our main dataset to same time period as that paper:
onlyoneyear <- protestobsclean[protestobsclean$protestdate <= "2015-08-09", ]; dim(onlyoneyear) #662, compared to their 780.

### load in additional dataset:
### this got output at earlier point in the coding/merge process (before we drop campus protests) so columns are more limited
load("./ourdataset_withcampusprotests_trimmedforwilliamsoncomparison.Rdata")
dim(onlyoneyear_withcampus) #727

